#  ██████╗ ██╗  ██╗██╗   ██╗██╗      ██████╗ ██████╗ ██╗  ██╗███████╗██████╗ ███████╗
#  ██╔══██╗██║  ██║╚██╗ ██╔╝██║     ██╔═══██╗██╔══██╗██║  ██║██╔════╝██╔══██╗██╔════╝
#  ██████╔╝███████║ ╚████╔╝ ██║     ██║   ██║██████╔╝███████║█████╗  ██████╔╝█████╗
#  ██╔═══╝ ██╔══██║  ╚██╔╝  ██║     ██║   ██║██╔═══╝ ██╔══██║██╔══╝  ██╔══██╗██╔══╝
#  ██║     ██║  ██║   ██║   ███████╗╚██████╔╝██║     ██║  ██║███████╗██║  ██║███████╗
#  ╚═╝     ╚═╝  ╚═╝   ╚═╝   ╚══════╝ ╚═════╝ ╚═╝     ╚═╝  ╚═╝╚══════╝╚═╝  ╚═╝╚══════╝

## PHYLOPHERE: A Nextflow pipeline including a complete set
## of phylogenetic comparative tools and analyses for Phenome-Genome studies

### Github: https://github.com/nozerorma/caastools/nf-phylophere
### Author: Miguel Ramon (miguel.ramon@upf.edu)

#### File: CI-composition.Rmd

Computing Credibility Intervals using Jeffreys Method

This script computes 95% credibility intervals (CIs) for the trait of interest:


Setup and Directory Configuration

This section configures the working environment, sets directories, and loads necessary functions and libraries.

# Call the setup function from commons.R
source("./obj/commons.R")
## [DEBUG] args = cancer_traits_processed-LQ.csv | science.abn7829_data_s4.nex.tree | data_exploration | 1998 | primates | family | malignant_prevalence | adult_necropsy_count | malignant_count | /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv | neoplasia_prevalence | LQ
## [DEBUG] seed_val = 1998
## [DEBUG] workingDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/e6/30761e30fd9009c710237054109a64
## [DEBUG] objDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/e6/30761e30fd9009c710237054109a64/obj
## [DEBUG] resultsDir = data_exploration
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## [1] "Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/e6/30761e30fd9009c710237054109a64"
## [1] "Results Directory: data_exploration"
## [DEBUG] Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/e6/30761e30fd9009c710237054109a64
## [DEBUG] Results Directory: data_exploration
## [DEBUG] data_exploration_dir = data_exploration/1.Data-exploration
## [DEBUG] species_distribution_dir = data_exploration/1.Data-exploration/1.Species_distribution
## [DEBUG] extreme_plots_dir = data_exploration/1.Data-exploration/2.Extreme_plots
## [DEBUG] asr_trees = data_exploration/1.Data-exploration/3.ASR_trees
## [DEBUG] phylo_distribution_dir = data_exploration/1.Data-exploration/4.Phylogenetic_distribution
## [DEBUG] ci_dir = data_exploration/1.Data-exploration/5.CI_overlaps
## [DEBUG] palettes loaded: primates=15, primates_dark=15, mammals=17
## [1] "Loading trait data from: cancer_traits_processed-LQ.csv"
## [DEBUG] trait_path = cancer_traits_processed-LQ.csv
## [DEBUG] trait_df rows = 47, cols = 19
## [DEBUG] trait_df columns: tax_id, species, common_name, family, adult_necropsy_count, neoplasia_necropsy, neoplasia_prevalence, benign_count, benign_prevalence, malignant_count, malignant_prevalence, body_mass, mass_g, log_body_mass, mature_f, mature_m, mature_age, MLS.anage, LQ
## [DEBUG] trait_df species unique = 47
## [DEBUG] tree_path = science.abn7829_data_s4.nex.tree
## [DEBUG] tree tips = 236, nodes = 235
## [DEBUG] clade_name = primates
## [DEBUG] taxon_of_interest = family
## [DEBUG] trait = malignant_prevalence
## [DEBUG] n_trait = malignant_count
## [DEBUG] p_trait = adult_necropsy_count
## [DEBUG] has.p = TRUE, p missing = 0
## [DEBUG] has.n = TRUE, n missing = 0
## Warning: package 'ape' was built under R version 4.4.2
## 
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
## 
##     where
## [DEBUG] tax_id_file = /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv
## [DEBUG] tax_id_df rows = 1290, distinct taxa = 807
## [DEBUG] has.TAX_ID = TRUE
## [DEBUG] trait_df merged with tax_id_df: rows = 47, missing tax_id = 0
## [DEBUG] normalized tax_id from merged columns, missing tax_id = 0
## [DEBUG] tree_ids rows = 40, missing tax_id = 0
## [DEBUG] common_tax_ids = 40
## [DEBUG] pruned_tree tips (TAX_ID) = 40, nodes = 39
## [DEBUG] trait_df after TAX_ID tree filter rows = 40
## [DEBUG] secondary_trait = neoplasia_prevalence
## [DEBUG] has.secondary = TRUE, missing = 0
## [DEBUG] branch_trait = LQ
## [DEBUG] has.branch = TRUE, missing = 0
setup_rmd()

# Debug helper (prints into HTML output)
if (is.null(getOption("phylo_debug"))) {
  options(phylo_debug = TRUE)
}
if (!exists("phylo_debug_log", envir = .GlobalEnv)) {
  phylo_debug_log <- character()
}
if (!exists("debug_log", inherits = TRUE)) {
  debug_log <- function(...) {
    msg <- sprintf(...)
    phylo_debug_log <<- c(phylo_debug_log, msg)
    cat("[DEBUG] ", msg, "\n", sep = "")
  }
}

# Load additional libraries
library(dplyr)
library(tidyr)
library(tibble)
library(ggplot2)
library(patchwork)
library(ggtext)

color_palette <- paste0(clade_name, "_palette") # Color palette for the clade
capitalized_taxon <- tools::toTitleCase(taxon_of_interest) # Capitalized taxon name
capitalized_trait <- tools::toTitleCase(gsub("_", " ", trait)) # Capitalized and deconvoluted trait name
capitalized_clade <- tools::toTitleCase(gsub("_", " ", clade_name)) # Capitalized and deconvoluted clade name

createDir(ci_dir)
## [DEBUG] createDir: created data_exploration/1.Data-exploration/5.CI_overlaps

1. Confidence Interval Computation

This section calculates 95% confidence intervals for malignant and neoplasia prevalence per species using both Wilson and Jeffreys methods.

# Compute CIs directly using DescTools and binom packages
trait_ci <- trait_df %>%
  group_by(species) %>%
  dplyr::rename(n_population = .data[[p_trait]],
                n_observation = .data[[n_trait]],
                trait = .data[[trait]]) %>%
  dplyr::mutate(
    # Jeffreys method CIs
    trait_ci_lb = binom::binom.confint(n_observation, n_population, 
                                            method = "bayes", priors = c(0.5, 0.5), 
                                            conf.level = 0.95)[, 5],
    trait_ci_ub = binom::binom.confint(n_observation, n_population, 
                                            method = "bayes", priors = c(0.5, 0.5), 
                                            conf.level = 0.95)[, 6],
    trait_ci = paste0("[", round(trait_ci_lb, 2), "-", round(trait_ci_ub, 2), "]")
  ) %>%
  ungroup()
## Warning: Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.
## ℹ Please use `all_of(var)` (or `any_of(var)`) instead of `.data[[var]]`
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Save results
write.csv(trait_ci, file.path(ci_dir, "trait_CI.csv"), 
          quote = FALSE, row.names = FALSE)

# Display preview
head(trait_ci)
## # A tibble: 6 × 22
##   species              common_name        family n_population neoplasia_necropsy
##   <chr>                <chr>              <chr>         <int>              <int>
## 1 Alouatta_caraya      Black howler       Ateli…           32                  4
## 2 Ateles_geoffroyi     Black-handed spid… Ateli…           43                 13
## 3 Callimico_goeldii    Goeldi's monkey    Cebid…           69                 15
## 4 Callithrix_geoffroyi White-fronted mar… Cebid…           31                  3
## 5 Callithrix_jacchus   Common marmoset    Cebid…           41                  1
## 6 Cebuella_pygmaea     Pygmy marmoset     Cebid…           41                  4
## # ℹ 17 more variables: neoplasia_prevalence <dbl>, benign_count <int>,
## #   benign_prevalence <dbl>, n_observation <int>, trait <dbl>, body_mass <dbl>,
## #   mass_g <dbl>, log_body_mass <dbl>, mature_f <dbl>, mature_m <dbl>,
## #   mature_age <dbl>, MLS.anage <dbl>, LQ <dbl>, tax_id <int>,
## #   trait_ci_lb <dbl>, trait_ci_ub <dbl>, trait_ci <chr>

3. Pairwise Comparison Preparation

This section prepares data for comparing cancer prevalence between all pairs of species.

# Helper function to create pairwise comparisons for a given method
create_pairwise_data <- function(ci_data) {
  # Define column names for lower and upper bounds  
  ci_subset <- ci_data %>%
    dplyr::select(species, n_population, 
          n_observation, trait, 
          trait_ci_lb, trait_ci_ub)
  
  # Generate all pairwise species combinations
  pairwise <- expand.grid(species1 = ci_subset$species,
                          species2 = ci_subset$species,
                          stringsAsFactors = FALSE)
  
  # Merge species data for each pair
  pairwise <- pairwise %>%
    left_join(ci_subset %>% rename_with(~paste0(.x, "1"), .cols = -species),
              by = c("species1" = "species")) %>%
    left_join(ci_subset %>% rename_with(~paste0(.x, "2"), .cols = -species),
              by = c("species2" = "species"))
  
  pairwise <- pairwise %>%
    mutate(
      trait_diff = trait1 - trait2,
      trait_overlap = ci_overlap(
        trait_ci_lb1, trait_ci_ub1, trait_ci_lb2, trait_ci_ub2
      ),
      diff_value = trait_diff,
      overlap_use = trait_overlap
    )
  
  return(pairwise)
}

# Create pairwise data
pairwise_data <- create_pairwise_data(trait_ci)

# Save results
write.csv(pairwise_data, file.path(ci_dir, "pairwise_data.csv"), 
          quote = FALSE, row.names = FALSE)

# Display preview
head(pairwise_data)
##               species1        species2 n_population1 n_observation1     trait1
## 1      Alouatta_caraya Alouatta_caraya            32              1 0.03125000
## 2     Ateles_geoffroyi Alouatta_caraya            43              8 0.18604651
## 3    Callimico_goeldii Alouatta_caraya            69              7 0.10144928
## 4 Callithrix_geoffroyi Alouatta_caraya            31              1 0.03225806
## 5   Callithrix_jacchus Alouatta_caraya            41              1 0.02439024
## 6     Cebuella_pygmaea Alouatta_caraya            41              2 0.04878049
##   trait_ci_lb1 trait_ci_ub1 n_population2 n_observation2  trait2 trait_ci_lb2
## 1 6.362256e-05   0.11585204            32              1 0.03125 6.362256e-05
## 2 8.440834e-02   0.30990210            32              1 0.03125 6.362256e-05
## 3 4.082088e-02   0.18007430            32              1 0.03125 6.362256e-05
## 4 6.621844e-05   0.11938682            32              1 0.03125 6.362256e-05
## 5 4.695278e-05   0.09147094            32              1 0.03125 6.362256e-05
## 6 4.027234e-03   0.13016665            32              1 0.03125 6.362256e-05
##   trait_ci_ub2   trait_diff trait_overlap   diff_value overlap_use
## 1     0.115852  0.000000000          TRUE  0.000000000        TRUE
## 2     0.115852  0.154796512          TRUE  0.154796512        TRUE
## 3     0.115852  0.070199275          TRUE  0.070199275        TRUE
## 4     0.115852  0.001008065          TRUE  0.001008065        TRUE
## 5     0.115852 -0.006859756          TRUE -0.006859756        TRUE
## 6     0.115852  0.017530488          TRUE  0.017530488        TRUE

4. Difference Matrix Creation

This section transforms pairwise data into matrix format for visualization.

# Helper function to build difference matrix
build_diff_matrix <- function(pairwise_data) {
  pairwise_data %>%
    select(species1, species2, diff_value) %>%
    pivot_wider(names_from = species2, values_from = diff_value) %>%
    column_to_rownames("species1") %>%
    as.matrix()
}

# Create matrices
matrix <- build_diff_matrix(pairwise_data)

# Save matrices
write.csv(matrix, file.path(ci_dir, "diff_matrix-CI.csv"), 
          quote = FALSE, row.names = TRUE)

# Display preview
head(matrix)
##                      Alouatta_caraya Ateles_geoffroyi Callimico_goeldii
## Alouatta_caraya          0.000000000      -0.15479651       -0.07019928
## Ateles_geoffroyi         0.154796512       0.00000000        0.08459724
## Callimico_goeldii        0.070199275      -0.08459724        0.00000000
## Callithrix_geoffroyi     0.001008065      -0.15378845       -0.06919121
## Callithrix_jacchus      -0.006859756      -0.16165627       -0.07705903
## Cebuella_pygmaea         0.017530488      -0.13726602       -0.05266879
##                      Callithrix_geoffroyi Callithrix_jacchus Cebuella_pygmaea
## Alouatta_caraya              -0.001008065        0.006859756      -0.01753049
## Ateles_geoffroyi              0.153788447        0.161656268       0.13726602
## Callimico_goeldii             0.069191211        0.077059031       0.05266879
## Callithrix_geoffroyi          0.000000000        0.007867821      -0.01652242
## Callithrix_jacchus           -0.007867821        0.000000000      -0.02439024
## Cebuella_pygmaea              0.016522423        0.024390244       0.00000000
##                      Cercopithecus_neglectus Colobus_guereza Eulemur_macaco
## Alouatta_caraya                 -0.004464286    -0.009987113    -0.16875000
## Ateles_geoffroyi                 0.150332226     0.144809398    -0.01395349
## Callimico_goeldii                0.065734990     0.060212162    -0.09855072
## Callithrix_geoffroyi            -0.003456221    -0.008979049    -0.16774194
## Callithrix_jacchus              -0.011324042    -0.016846869    -0.17560976
## Cebuella_pygmaea                 0.013066202     0.007543374    -0.15121951
##                      Galago_moholi Galago_senegalensis Gorilla_gorilla
## Alouatta_caraya        -0.03125000         0.016098485     0.001399254
## Ateles_geoffroyi        0.12354651         0.170894996     0.156195765
## Callimico_goeldii       0.03894928         0.086297760     0.071598529
## Callithrix_geoffroyi   -0.03024194         0.017106549     0.002407318
## Callithrix_jacchus     -0.03810976         0.009238729    -0.005460502
## Cebuella_pygmaea       -0.01371951         0.033628973     0.018929742
##                      Hylobates_lar Lemur_catta Leontopithecus_chrysomelas
## Alouatta_caraya        -0.12784091 -0.09989754                -0.08639706
## Ateles_geoffroyi        0.02695560  0.05489897                 0.06839945
## Callimico_goeldii      -0.05764163 -0.02969827                -0.01619778
## Callithrix_geoffroyi   -0.12683284 -0.09888948                -0.08538899
## Callithrix_jacchus     -0.13470067 -0.10675730                -0.09325681
## Cebuella_pygmaea       -0.11031042 -0.08236705                -0.06886657
##                      Leontopithecus_rosalia Macaca_fascicularis Macaca_fuscata
## Alouatta_caraya                -0.024305556          0.03125000    0.007994186
## Ateles_geoffroyi                0.130490956          0.18604651    0.162790698
## Callimico_goeldii               0.045893720          0.10144928    0.078193461
## Callithrix_geoffroyi           -0.023297491          0.03225806    0.009002251
## Callithrix_jacchus             -0.031165312          0.02439024    0.001134430
## Cebuella_pygmaea               -0.006775068          0.04878049    0.025524674
##                      Macaca_nigra Macaca_silenus Mandrillus_sphinx
## Alouatta_caraya       0.003472222    0.001838235      -0.013194444
## Ateles_geoffroyi      0.158268734    0.156634747       0.141602067
## Callimico_goeldii     0.073671498    0.072037511       0.057004831
## Callithrix_geoffroyi  0.004480287    0.002846300      -0.012186380
## Callithrix_jacchus   -0.003387534   -0.005021521      -0.020054201
## Cebuella_pygmaea      0.021002710    0.019368723       0.004336043
##                      Mico_argentatus Microcebus_murinus Nycticebus_coucang
## Alouatta_caraya           0.03125000        -0.21436404        -0.05208333
## Ateles_geoffroyi          0.18604651        -0.05956752         0.10271318
## Callimico_goeldii         0.10144928        -0.14416476         0.01811594
## Callithrix_geoffroyi      0.03225806        -0.21335597        -0.05107527
## Callithrix_jacchus        0.02439024        -0.22122379        -0.05894309
## Cebuella_pygmaea          0.04878049        -0.19683355        -0.03455285
##                      Nycticebus_pygmaeus Pan_troglodytes Papio_hamadryas
## Alouatta_caraya              -0.19323980      0.02072368      0.02154126
## Ateles_geoffroyi             -0.03844328      0.17552020      0.17633777
## Callimico_goeldii            -0.12304052      0.09092296      0.09174054
## Callithrix_geoffroyi         -0.19223173      0.02173175      0.02254933
## Callithrix_jacchus           -0.20009955      0.01386393      0.01468151
## Cebuella_pygmaea             -0.17570931      0.03825417      0.03907175
##                      Propithecus_coquereli Saguinus_bicolor Saguinus_imperator
## Alouatta_caraya                -0.04875000     -0.008750000         0.03125000
## Ateles_geoffroyi                0.10604651      0.146046512         0.18604651
## Callimico_goeldii               0.02144928      0.061449275         0.10144928
## Callithrix_geoffroyi           -0.04774194     -0.007741935         0.03225806
## Callithrix_jacchus             -0.05560976     -0.015609756         0.02439024
## Cebuella_pygmaea               -0.03121951      0.008780488         0.04878049
##                      Saguinus_midas Saguinus_oedipus Saimiri_sciureus
## Alouatta_caraya          0.03125000      -0.04337687      -0.02047414
## Ateles_geoffroyi         0.18604651       0.11141965       0.13432237
## Callimico_goeldii        0.10144928       0.02682241       0.04972514
## Callithrix_geoffroyi     0.03225806      -0.04236880      -0.01946607
## Callithrix_jacchus       0.02439024      -0.05023662      -0.02733389
## Cebuella_pygmaea         0.04878049      -0.02584638      -0.00294365
##                      Sapajus_apella Theropithecus_gelada Trachypithecus_auratus
## Alouatta_caraya          0.03125000          0.012019231           -0.005787037
## Ateles_geoffroyi         0.18604651          0.166815742            0.149009475
## Callimico_goeldii        0.10144928          0.082218506            0.064412238
## Callithrix_geoffroyi     0.03225806          0.013027295           -0.004778973
## Callithrix_jacchus       0.02439024          0.005159475           -0.012646793
## Cebuella_pygmaea         0.04878049          0.029549719            0.011743451
##                      Trachypithecus_cristatus Trachypithecus_francoisi
## Alouatta_caraya                    0.03125000             -0.068750000
## Ateles_geoffroyi                   0.18604651              0.086046512
## Callimico_goeldii                  0.10144928              0.001449275
## Callithrix_geoffroyi               0.03225806             -0.067741935
## Callithrix_jacchus                 0.02439024             -0.075609756
## Cebuella_pygmaea                   0.04878049             -0.051219512
##                      Varecia_rubra Varecia_variegata
## Alouatta_caraya        -0.10434322       -0.13980263
## Ateles_geoffroyi        0.05045329        0.01499388
## Callimico_goeldii      -0.03414394       -0.06960336
## Callithrix_geoffroyi   -0.10333516       -0.13879457
## Callithrix_jacchus     -0.11120298       -0.14666239
## Cebuella_pygmaea       -0.08681273       -0.12227214

5. Heatmap Visualization

This section creates heatmaps to visualize pairwise differences in prevalence. Species labels are colored based on prevalence quantiles.

# Simplified function to create colored labels based on trait quantiles
create_colored_labels <- function(trait_data) {
  # Calculate quantile thresholds
  q75 <- quantile(trait_data$trait, 0.75, na.rm = TRUE)
  q25 <- quantile(trait_data$trait, 0.25, na.rm = TRUE)
  
  # Create a lookup table with species and their colors
  label_df <- trait_data %>%
    mutate(
      color = case_when(
        trait >= q75 ~ "#D73027",  # Red for top quartile
        trait <= q25 ~ "#1A9850",  # Green for bottom quartile
        TRUE ~ "#000000"           # Black for middle
      )
    ) %>%
    select(species, color)
  
  # Create named vector of colored HTML labels
  colored_labels <- setNames(
    paste0("<span style='color:", label_df$color, ";'>", label_df$species, "</span>"),
    label_df$species
  )
  
  return(colored_labels)
}

# Helper function to create legend plot
create_legend_plot <- function() {
  legend_df <- tibble(
    category = c("Top", "Bottom"),
    color = c("#D73027", "#1A9850")
  )
  
  ggplot(legend_df, aes(x = 1, y = category, color = category)) +
    geom_point(size = 0) +
    scale_color_manual(
      values = setNames(legend_df$color, legend_df$category),
      guide = guide_legend(override.aes = list(size = 5))
    ) +
    labs(color = "Trait Categories") +
    theme_void() +
    theme(
      legend.position = "right",
      legend.title = element_text(size = 12),
      legend.text = element_text(size = 10)
    )
}

# Helper function to create heatmap
create_heatmap <- function(pairwise_data, colored_labels, 
                          highlight_nonoverlap = TRUE) {
  # Ensure label mapping is stable across axes
  if (is.null(names(colored_labels))) {
    stop("colored_labels must be a named vector with species names.")
  }
  pairwise_data <- pairwise_data %>%
    mutate(
      species1 = factor(species1, levels = names(colored_labels)),
      species2 = factor(species2, levels = names(colored_labels))
    )

  p <- ggplot(pairwise_data, aes(x = species2, y = species1, fill = abs(diff_value))) +
    geom_tile(color = "black", size = 0.25) +
    geom_text(aes(label = sprintf("%.2f", abs(diff_value))), size = 3) +
    scale_fill_gradient(low = "white", high = "salmon3", name = "Difference") +
    scale_x_discrete(labels = function(x) colored_labels[x]) +
    scale_y_discrete(labels = function(y) colored_labels[y]) +
    theme_minimal() +
    theme(
      axis.text.x = element_markdown(angle = 45, hjust = 1),
      axis.text.y = element_markdown(angle = 0),
      panel.grid = element_blank(),
      legend.position = "none"
    )
  
  # Add bold border for non-overlapping CIs if requested
  if (highlight_nonoverlap) {
    p <- p + 
      geom_tile(data = filter(pairwise_data, !overlap_use), 
                fill = NA, color = "black", size = 1.5) +
      labs(
        x = "Species", y = "Species",
        title = paste("Pairwise Differences in trait: ", capitalized_trait, " with Non-overlapping CIs", sep = ""),
        caption = "Bold border: non-overlapping CIs."
      )
  } else {
    p <- p +
      labs(
        x = "Species", y = "Species",
        title = paste("Pairwise Differences in trait: ", capitalized_trait, sep = ""),
        caption = "Bold border: non-overlapping CIs."
      )
  }
  
  return(p)
}

# Create colored labels directly from trait data
my_labels_colored <- create_colored_labels(trait_ci)

# Save colored labels
write.csv(my_labels_colored, file.path(ci_dir, "my_labels_colored.csv"), 
          quote = FALSE, row.names = TRUE)

# Create legend
legend_plot <- create_legend_plot()

# Create heatmaps with non-overlapping CI highlights
p_overlap <- create_heatmap(pairwise_data, my_labels_colored, highlight_nonoverlap = TRUE) + 
  legend_plot + plot_layout(ncol = 2, widths = c(1000, 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p_flat <- create_heatmap(pairwise_data, my_labels_colored, highlight_nonoverlap = FALSE) + 
  legend_plot + plot_layout(ncol = 2, widths = c(1000, 1))

# Save heatmaps
ggsave(file.path(ci_dir, "visual_matrix_no_overlap.png"), p_overlap, 
       width = 15, height = 12, dpi = "retina")
ggsave(file.path(ci_dir, "visual_matrix_flat.png"), p_flat, 
       width = 15, height = 12, dpi = "retina")

# Display plots
p_overlap

p_flat

6. Polished Tables Preparation

This section creates readable summary tables for cancer traits and pairwise comparisons.

# Polished table for cancer traits
trait_ci_polished <- trait_ci %>%
  mutate(
    trait = round(trait, 3),
  ) %>%
  dplyr::select(species, n_population, n_observation, 
        trait, trait_ci_lb, trait_ci_ub, trait_ci) %>%
  dplyr::rename(
    "Trait" = trait,
    "Trait CI Lower Bound" = trait_ci_lb,
    "Trait CI Upper Bound" = trait_ci_ub,
    "Trait CI" = trait_ci
  )

write.csv(trait_ci_polished, 
          file.path(ci_dir, "trait_CI_polished.csv"), 
          quote = FALSE, row.names = FALSE)

# Polished pairwise table - recreate from original data with all CI columns
trait_diff <- trait_ci %>%
  dplyr::select(species, n_population , n_observation, trait,
        trait_ci_lb, trait_ci_ub, trait_ci)

# Generate all pairwise species combinations
pairwise_data_full <- expand.grid(species1 = trait_diff$species,
                                   species2 = trait_diff$species,
                                   stringsAsFactors = FALSE) %>%
  left_join(trait_diff %>% rename_with(~paste0(.x, "1"), .cols = -species),
            by = c("species1" = "species")) %>%
  left_join(trait_diff %>% rename_with(~paste0(.x, "2"), .cols = -species),
            by = c("species2" = "species"))

# Create polished pairwise table
pairwise_data_end_polished <- pairwise_data_full %>%
  dplyr::mutate(
    trait_diff = abs(trait1 - trait2),
    trait_diff = round(trait_diff, 3),
    # Format CIs for both Wilson and Jeffreys methods
    trait_CI1 = sprintf("[%.2f-%.2f]", 
                                 as.numeric(trait_ci_lb1), 
                                 as.numeric(trait_ci_ub1)),
    trait_CI2 = sprintf("[%.2f-%.2f]", 
                                 as.numeric(trait_ci_lb2), 
                                 as.numeric(trait_ci_ub2)),
    diff_value = trait_diff
  ) %>%
  dplyr::select(species1, species2, 
         trait_diff, trait_CI1, trait_CI2) %>%
  dplyr::rename(
    "Trait Difference" = trait_diff,
    "Trait CI1" = trait_CI1,        
    "Trait CI2" = trait_CI2
  )

write.csv(pairwise_data_end_polished, 
          file.path(ci_dir, "pairwise_data_end_polished.csv"), 
          quote = FALSE, row.names = FALSE)

# Display preview
head(trait_ci_polished)
## # A tibble: 6 × 7
##   species              n_population n_observation Trait `Trait CI Lower Bound`
##   <chr>                       <int>         <int> <dbl>                  <dbl>
## 1 Alouatta_caraya                32             1 0.031              0.0000636
## 2 Ateles_geoffroyi               43             8 0.186              0.0844   
## 3 Callimico_goeldii              69             7 0.101              0.0408   
## 4 Callithrix_geoffroyi           31             1 0.032              0.0000662
## 5 Callithrix_jacchus             41             1 0.024              0.0000470
## 6 Cebuella_pygmaea               41             2 0.049              0.00403  
## # ℹ 2 more variables: `Trait CI Upper Bound` <dbl>, `Trait CI` <chr>

Session Information

if (length(phylo_debug_log) > 0) {
  cat("### Debug log\n")
  cat(paste0("[DEBUG] ", phylo_debug_log, "\n"))
}

Debug log

[DEBUG] args = cancer_traits_processed-LQ.csv | science.abn7829_data_s4.nex.tree | data_exploration | 1998 | primates | family | malignant_prevalence | adult_necropsy_count | malignant_count | /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv | neoplasia_prevalence | LQ [DEBUG] seed_val = 1998 [DEBUG] workingDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/e6/30761e30fd9009c710237054109a64 [DEBUG] objDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/e6/30761e30fd9009c710237054109a64/obj [DEBUG] resultsDir = data_exploration [DEBUG] Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/e6/30761e30fd9009c710237054109a64 [DEBUG] Results Directory: data_exploration [DEBUG] data_exploration_dir = data_exploration/1.Data-exploration [DEBUG] species_distribution_dir = data_exploration/1.Data-exploration/1.Species_distribution [DEBUG] extreme_plots_dir = data_exploration/1.Data-exploration/2.Extreme_plots [DEBUG] asr_trees = data_exploration/1.Data-exploration/3.ASR_trees [DEBUG] phylo_distribution_dir = data_exploration/1.Data-exploration/4.Phylogenetic_distribution [DEBUG] ci_dir = data_exploration/1.Data-exploration/5.CI_overlaps [DEBUG] palettes loaded: primates=15, primates_dark=15, mammals=17 [DEBUG] trait_path = cancer_traits_processed-LQ.csv [DEBUG] trait_df rows = 47, cols = 19 [DEBUG] trait_df columns: tax_id, species, common_name, family, adult_necropsy_count, neoplasia_necropsy, neoplasia_prevalence, benign_count, benign_prevalence, malignant_count, malignant_prevalence, body_mass, mass_g, log_body_mass, mature_f, mature_m, mature_age, MLS.anage, LQ [DEBUG] trait_df species unique = 47 [DEBUG] tree_path = science.abn7829_data_s4.nex.tree [DEBUG] tree tips = 236, nodes = 235 [DEBUG] clade_name = primates [DEBUG] taxon_of_interest = family [DEBUG] trait = malignant_prevalence [DEBUG] n_trait = malignant_count [DEBUG] p_trait = adult_necropsy_count [DEBUG] has.p = TRUE, p missing = 0 [DEBUG] has.n = TRUE, n missing = 0 [DEBUG] tax_id_file = /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv [DEBUG] tax_id_df rows = 1290, distinct taxa = 807 [DEBUG] has.TAX_ID = TRUE [DEBUG] trait_df merged with tax_id_df: rows = 47, missing tax_id = 0 [DEBUG] normalized tax_id from merged columns, missing tax_id = 0 [DEBUG] tree_ids rows = 40, missing tax_id = 0 [DEBUG] common_tax_ids = 40 [DEBUG] pruned_tree tips (TAX_ID) = 40, nodes = 39 [DEBUG] trait_df after TAX_ID tree filter rows = 40 [DEBUG] secondary_trait = neoplasia_prevalence [DEBUG] has.secondary = TRUE, missing = 0 [DEBUG] branch_trait = LQ [DEBUG] has.branch = TRUE, missing = 0 [DEBUG] createDir: created data_exploration/1.Data-exploration/5.CI_overlaps

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-conda-linux-gnu
## Running under: TUXEDO OS
## 
## Matrix products: default
## BLAS/LAPACK: /home/miguel/micromamba/envs/caas_global_cancer/lib/libopenblasp-r0.3.28.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=es_ES.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Madrid
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggtext_0.1.2    patchwork_1.3.0 ggplot2_3.5.1   tibble_3.2.1   
## [5] ape_5.8-1       tidyr_1.3.1     dplyr_1.1.4     readr_2.1.5    
## 
## loaded via a namespace (and not attached):
##  [1] fastmatch_1.1-6         gtable_0.3.6            xfun_0.51              
##  [4] bslib_0.9.0             lattice_0.22-6          tzdb_0.5.0             
##  [7] numDeriv_2016.8-1.1     quadprog_1.5-8          vctrs_0.6.5            
## [10] tools_4.4.1             generics_0.1.3          parallel_4.4.1         
## [13] pkgconfig_2.0.3         Matrix_1.7-3            scatterplot3d_0.3-44   
## [16] lifecycle_1.0.4         binom_1.1-1.1           stringr_1.5.1          
## [19] compiler_4.4.1          farver_2.1.2            textshaping_1.0.0      
## [22] munsell_0.5.1           mnormt_2.1.1            combinat_0.0-8         
## [25] codetools_0.2-20        litedown_0.6            htmltools_0.5.8.1      
## [28] maps_3.4.2.1            sass_0.4.9              yaml_2.3.10            
## [31] pillar_1.10.1           crayon_1.5.3            jquerylib_0.1.4        
## [34] MASS_7.3-65             cachem_1.1.0            clusterGeneration_1.3.8
## [37] iterators_1.0.14        foreach_1.5.2           nlme_3.1-167           
## [40] phangorn_2.12.1         commonmark_1.9.5        tidyselect_1.2.1       
## [43] digest_0.6.37           stringi_1.8.4           purrr_1.0.4            
## [46] labeling_0.4.3          fastmap_1.2.0           grid_4.4.1             
## [49] colorspace_2.1-1        expm_1.0-0              cli_3.6.4              
## [52] magrittr_2.0.3          optimParallel_1.0-2     utf8_1.2.4             
## [55] withr_3.0.2             scales_1.3.0            DEoptim_2.2-8          
## [58] rmarkdown_2.29          igraph_2.1.4            ragg_1.3.3             
## [61] hms_1.1.3               coda_0.19-4.1           evaluate_1.0.3         
## [64] knitr_1.50              phytools_2.4-4          doParallel_1.0.17      
## [67] markdown_2.0            rlang_1.1.5             gridtext_0.1.5         
## [70] Rcpp_1.0.14             glue_1.8.0              xml2_1.3.8             
## [73] jsonlite_1.9.1          R6_2.6.1                systemfonts_1.2.1